Gender bias in audience of seminars and career position
Data
Data description and summary in script
0_data_summary.
Modeling
Negative binomial mixed-effects model with the year as random intercept.
mg0 <- glmmTMB(audience_n~ 1 + (1|year), data=data, family= nbinom2)
mg1 <- glmmTMB(audience_n~ gender + (1|year), data=data, family= nbinom2)
mg2 <- glmmTMB(audience_n~ position_cat + (1|year), data=data, family= nbinom2)
mg3 <- glmmTMB(audience_n~ affirm_action + (1|year), data=data, family= nbinom2)
mg4 <- glmmTMB(audience_n~ gender + position_cat + (1|year),
data=data, family= nbinom2)
mg5 <- glmmTMB(audience_n~ gender + affirm_action + (1|year),
data=data, family= nbinom2)
mg6 <- glmmTMB(audience_n~ affirm_action + position_cat+ (1|year),
data=data, family= nbinom2)
mg7 <- glmmTMB(audience_n~ gender * position_cat+ (1|year),
data=data, family= nbinom2)
mg8 <- glmmTMB(audience_n~ gender * affirm_action+ (1|year),
data=data, family= nbinom2)
mg9 <- glmmTMB(audience_n~ affirm_action * position_cat+ (1|year),
data=data, family= nbinom2)
mg10 <- glmmTMB(audience_n~ gender + position_cat + affirm_action + (1|year),
data=data, family= nbinom2)
mg11 <- glmmTMB(audience_n~ gender * position_cat + affirm_action + (1|year),
data=data, family= nbinom2)
mg12 <- glmmTMB(audience_n~ gender + position_cat * affirm_action + (1|year),
data=data, family= nbinom2)
mg13 <- glmmTMB(audience_n~ gender * position_cat * affirm_action + (1|year),
data=data, family= nbinom2)
AICtab(mg0,mg1,mg2, mg3, mg4,mg5,mg6,mg7,mg8,mg9,mg10,mg11,mg12,mg13, base=T, weights=T) %>% kable(digits=2)| AIC | dAIC | df | weight | |
|---|---|---|---|---|
| mg11 | 2160.03 | 0.00 | 9 | 0.45 |
| mg10 | 2161.43 | 1.41 | 7 | 0.22 |
| mg7 | 2162.27 | 2.24 | 8 | 0.15 |
| mg4 | 2163.49 | 3.47 | 6 | 0.08 |
| mg12 | 2163.98 | 3.95 | 9 | 0.06 |
| mg13 | 2166.62 | 6.59 | 14 | 0.02 |
| mg6 | 2167.07 | 7.04 | 6 | 0.01 |
| mg2 | 2168.86 | 8.83 | 5 | 0.01 |
| mg9 | 2170.27 | 10.25 | 8 | 0.00 |
| mg5 | 2185.83 | 25.80 | 5 | 0.00 |
| mg1 | 2186.24 | 26.22 | 4 | 0.00 |
| mg8 | 2187.65 | 27.62 | 6 | 0.00 |
| mg0 | 2201.16 | 41.14 | 3 | 0.00 |
| mg3 | 2201.29 | 41.27 | 4 | 0.00 |
Models result
The two equally plausible models for the audience included gender, academic position and affirmative actions as predictors, with the difference that the best fitted model includes an interaction of gender and academic position.
## Family: nbinom2 ( log )
## Formula: audience_n ~ gender * position_cat + affirm_action + (1 | year)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 2160.0 2193.3 -1071.0 2142.0 289
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.009051 0.09513
## Number of obs: 298, groups: year, 12
##
## Dispersion parameter for nbinom2 family (): 7.05
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.80057 0.06192 45.23 <2e-16 ***
## genderM 0.11642 0.07417 1.57 0.1165
## position_catpostdoc 0.12249 0.10663 1.15 0.2506
## position_catprofessor 0.16594 0.10469 1.59 0.1130
## affirm_actionafter 0.21633 0.09697 2.23 0.0257 *
## genderM:position_catpostdoc -0.12984 0.14357 -0.90 0.3658
## genderM:position_catprofessor 0.22663 0.12674 1.79 0.0738 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # R2 for Mixed Models
##
## Conditional R2: 0.221
## Marginal R2: 0.180
myg11 <- ggpredict(mg11, terms=c("position_cat","gender", "affirm_action"))
prs <- as.data.frame(myg11) %>% rename(affirm_action = facet)
colnames(prs)[1] <- "position_cat"
#kable(prs , digits = 0)ggplot(data, aes(x=position_cat, y=audience_n)) +
geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
size=3,show.legend = F) +
facet_grid(~affirm_action,
labeller = as_labeller(c("before"="Before affirmative actions" , "after"="After affirmative actions" ))) +
#scale_color_manual(values = c("#b2abd2", "#fdb863")) +
scale_color_manual(values = c("#6D57CF","#FCA532")) +
scale_fill_manual(name="Gender", values = c("#6D57CF","#FCA532")) +
geom_pointrange(data=prs, aes(x=position_cat, y=predicted,fill=group,
ymax=conf.high, ymin=conf.low), alpha=1,
position=position_dodge(0.6), size=1, shape=21, col="black") +
xlab("Academic position") + ylab("Audience (N)") ggplot(data, aes(x=affirm_action, y=audience_n)) +
geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
size=3,show.legend = F) +
facet_grid(~position_cat, labeller =
as_labeller(c(student = "Student",
postdoc = "Post-doc",
professor = "Professor"))) +
scale_color_manual(values = c("#6D57CF","#FCA532")) +
scale_x_discrete(labels = c("Before", "After"))+
#scale_y_log10()+
scale_fill_manual(name="GĂȘnero", values = c("#6D57CF","#FCA532")) +
geom_pointrange(data=prs, aes(x=affirm_action, y=predicted,fill=group,
ymax=conf.high, ymin=conf.low), alpha=1,
position=position_dodge(0.6), size=1.2, shape=21, col="black") +
xlab("Affirmative actions") + ylab("Audience (N)") ## Family: nbinom2 ( log )
## Formula: audience_n ~ gender + position_cat + affirm_action + (1 | year)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 2161.4 2187.3 -1073.7 2147.4 291
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## year (Intercept) 0.008642 0.09296
## Number of obs: 298, groups: year, 12
##
## Dispersion parameter for nbinom2 family (): 6.87
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.78546 0.05768 48.29 < 2e-16 ***
## genderM 0.15395 0.05522 2.79 0.0053 **
## position_catpostdoc 0.04292 0.07173 0.60 0.5496
## position_catprofessor 0.33054 0.06264 5.28 1.32e-07 ***
## affirm_actionafter 0.20807 0.09577 2.17 0.0298 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # R2 for Mixed Models
##
## Conditional R2: 0.207
## Marginal R2: 0.169
myg10 <- ggpredict(mg10, terms=c("position_cat","gender", "affirm_action"))
pr10 <- as.data.frame(myg10) %>% rename(affirm_action = facet)
colnames(pr10)[1] <- "position_cat"ggplot(data, aes(x=affirm_action, y=audience_n)) +
geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
size=3,show.legend = F) +
facet_grid(~position_cat, labeller =
as_labeller(c(student = "Student",
postdoc = "Post-doc",
professor = "Professor"))) +
scale_color_manual(values = c("#6D57CF","#FCA532")) +
scale_x_discrete(labels = c("Before", "After")) +
scale_fill_manual(name="GĂȘnero", values = c("#6D57CF","#FCA532")) +
geom_pointrange(data=pr10, aes(x=affirm_action, y=predicted,fill=group,
ymax=conf.high, ymin=conf.low), alpha=1,
position=position_dodge(0.6), size=1.2, shape=21, col="black") +
xlab("Affirmative actions") + ylab("Audience (N)") Only professors - productivity metrics
Investigating if differences in productivity between male and female professors and researches are related to the audience.
We measured productivity publication metrics from Google Scholar for professors and researchers.
Creating productivity index using PCA 1st axis from metrics.
PCA productivity metrics
dp <- data %>% filter(!is.na(data$total_citation_n),
!is.na(data$nature_index_count))
table(dp$gender, dp$affirm_action)##
## before after
## F 14 6
## M 58 9
Productivity publication metrics
pca1 <- PCA(dp[, c(22:29)], graph=F)
p1 <- fviz_pca_biplot(pca1, col.ind = dp$gender, addEllipses=TRUE,
col.ind.sub="none", geom="point",
repel = TRUE) +
geom_vline(xintercept = 0, linetype="dashed") +
geom_hline(yintercept = 0, linetype="dashed")+
scale_color_manual(name="GĂȘnero",values = c("#6D57CF","#FCA532"))+
scale_shape(name="GĂȘnero")+
scale_fill_manual(name="GĂȘnero",values = c("#6D57CF","#FCA532"))+
ggtitle("Productivity metrics") +
xlab("PC1 (52%)") + ylab("PC2 (21%)") +
theme_cowplot()
p1For the analysis specific for professor talks (N=87), the PCA results show that all the productivity metrics for professors were highly correlated (Figure 2B) with the first axis (52% of variance explained) while the institution indexes composed the second PCA axis (21% of variation explained).
Extracting PCA 2 first axes
Modeling
m0 <- glmmTMB(audience_n ~ 1 + affirm_action + (1|year), data=dp, family=nbinom2)
m1 <- glmmTMB(audience_n ~ gender + affirm_action +(1|year), data=dp,
family=nbinom2)
m2 <- glmmTMB(audience_n ~ pc1 + + affirm_action + (1|year), data=dp,
family=nbinom2)
m3 <- glmmTMB(audience_n ~ gender + pc1 + affirm_action + (1|year),
data=dp, family=nbinom2)
m4 <- glmmTMB(audience_n ~ gender*pc1 + affirm_action + (1|year),
data=dp, family=nbinom2)
AICtab(m0,m1,m2,m3,m4,
base=T, weights=T) %>% kable(digits=2)| AIC | dAIC | df | weight | |
|---|---|---|---|---|
| m3 | 691.32 | 0.00 | 6 | 0.60 |
| m4 | 692.95 | 1.64 | 7 | 0.27 |
| m2 | 695.04 | 3.73 | 5 | 0.09 |
| m1 | 696.94 | 5.62 | 5 | 0.04 |
| m0 | 702.13 | 10.82 | 4 | 0.00 |
Model results
## Family: nbinom2 ( log )
## Formula: audience_n ~ gender + pc1 + affirm_action + (1 | year)
## Data: dp
##
## AIC BIC logLik deviance df.resid
## 691.3 706.1 -339.7 679.3 81
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## year (Intercept) 1.622e-09 4.027e-05
## Number of obs: 87, groups: year, 11
##
## Dispersion parameter for nbinom2 family (): 5.52
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.03070 0.11415 26.550 <2e-16 ***
## genderM 0.30566 0.12366 2.472 0.0134 *
## pc1 0.07246 0.02626 2.759 0.0058 **
## affirm_actionafter 0.27050 0.13267 2.039 0.0415 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Random effect variances not available. Returned R2 does not account for random effects.
## # R2 for Mixed Models
##
## Conditional R2: NA
## Marginal R2: 0.206
We used the first PCA axis as predictors together with gender to explain the professorâs audience, and found that, as expected, audience increases with productivity index (first PCA axis) but female professors still presented on average audience 1.4 times smaller than male professors.
my3 <- ggpredict(m3, terms=c("pc1","gender"))
my3 <- as.data.frame(my3)
ggplot(my3, aes(x=x, y=predicted, col=group)) +
geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group), alpha=0.3,
colour = NA) +
geom_line()+
scale_color_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
scale_fill_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
theme_cowplot() + ggtitle("") +
ylab("Audience (N)") + xlab("Productivity index (PC1 axis)")+
geom_point(data=dp, aes(x=pc1, y=audience_n, col=gender), alpha=0.6)my4 <- ggpredict(m4, terms=c("pc1","gender")) %>% as.data.frame()
ggplot(my4, aes(x=x, y=predicted, col=group)) +
geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group), alpha=0.3,
colour = NA) +
geom_line()+
scale_color_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
scale_fill_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
theme_cowplot() + ggtitle("") +
ylab("Audience (N)") + xlab("Productivity index (PC1 axis)")+
geom_point(data=dp, aes(x=pc1, y=audience_n, col=gender), alpha=0.6)Figure audience
prs <- as.data.frame(myg11) %>% rename(affirm_action = facet,
position_cat=x)
f1 <- ggplot(data, aes(x=affirm_action, y=audience_n)) +
geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
size=3,show.legend = F) +
facet_grid(~position_cat) +
#scale_color_manual(values = c("#b2abd2", "#fdb863")) +
scale_color_manual(values = c("#6D57CF","#FCA532")) +
scale_fill_manual(name="Gender", values = c("#6D57CF","#FCA532")) +
geom_pointrange(data=prs, aes(x=affirm_action, y=predicted,fill=group,
ymax=conf.high, ymin=conf.low), alpha=1,
position=position_dodge(0.6), size=1, shape=21, col="black") +
ylab("Audience (N)") +
xlab("Affirmative actions")+
labs(tag="A")
my3 <- ggpredict(m3, terms=c("pc1","gender"))
my3 <- as.data.frame(my3)
#my3$prof <- "Professors only"
f2 <- ggplot(my3, aes(x=x, y=predicted, col=group)) +
geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group), alpha=0.3,
colour = NA) +
geom_line(size=1.5)+
#facet_grid(~prof)+
scale_color_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
scale_fill_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
theme_cowplot() + ggtitle("") +
ylab("Audience (N)") + xlab("Productivity index (PC1 axis)")+
geom_point(data=dp, aes(x=pc1, y=audience_n, col=gender), alpha=0.6,
size=2)+
theme(legend.position="none",
plot.title = element_text(size=12, vjust=2, hjust=0.5)) +
labs(tag="C", title="Professors' audience")
p1<- fviz_pca_biplot(pca1, col.ind = dp$gender, addEllipses=TRUE,
col.ind.sub="none", geom="point",
repel = TRUE) +
facet_grid(.~.)+
geom_vline(xintercept = 0, linetype="dashed") +
geom_hline(yintercept = 0, linetype="dashed")+
scale_color_manual(name="gender",values = c("#6D57CF","#FCA532"))+
scale_shape(name="gender")+
scale_fill_manual(name="gender",values = c("#6D57CF","#FCA532"))+
labs(title="Professors' productivity", tag="B") +
xlab("PC1 (52%)") + ylab("PC2 (21%)") +
theme_cowplot() +
theme(legend.position="none",
plot.title = element_text(size=12, vjust=2, hjust=0.5)) #+
#coord_cartesian(clip = "off")+
# scale_x_continuous(limits=c(-6,8), expand=c(0,0))+
# scale_y_continuous(limits=c(-4,8), breaks=c(-4,-2,0,2,4,6))
# annotate("rect", xmin=-6, xmax=8, ymin=7,ymax=8, fill="gray85")+
#annotate("text",label="Professors'productivity", x=0, y=7.5, fill="gray85")
f1/(p1+f2) +plot_layout(guides="collect", heights = c(0.9,1))